Last compiled: 2021-01-03
Goal
In the previous section, we predicted whether or not a product will be put on ‘backorder’ status using H2O model. We now take the H2O models developed to inspect, visualize, and communicate performance to business stakeholders.
These are some relevant questions to ask ponder:
cowplot package?
For this, I will be reusing the Product Backorders data set (source of raw data is linked below). You may download the data in case you want to try this code on your own.
Please note this is a continuation of the previous section.
Raw data source:
Download product_backorders.csv
cowplot package
As a first step, please load tidyverse and tidymodels libraries. For details on what these libraries offer, please refer to the comments in the code block below.
# STEP 1: Load Libraries ---
# Tidy, Transform, & Visualize
library(tidyverse)
# library(tibble) --> is a modern re-imagining of the data frame
# library(readr) --> provides a fast and friendly way to read rectangular data like csv
# library(dplyr) --> provides a grammar of data manipulation
# library(magrittr) --> offers a set of operators which make your code more readable (pipe operator)
# library(tidyr) --> provides a set of functions that help you get to tidy data
# library(stringr) --> provides a cohesive set of functions designed to make working with strings as easy as possible
# library(ggplot2) --> graphics
library(tidymodels)
# library(rsample) --> provides infrastructure for efficient data splitting, resampling and cross validation.
# library(parsnip) --> provides an API to many powerful modeling algorithms in R.
# library(recipes) --> tidy interface to data pre-processing (making statistical transformations) tools for feature engineering (prior to modeling).
# library(workflows) --> bundle your pre-processing, modeling, and post-processing together.
# library(tune) --> helps you optimize the hyperparameters of your model and pre-processing steps.
# library(yardstick) --> measures the effectiveness of models using performance metrics (metrics for model comparison).
# library(broom) --> converts the information in common statistical R objects into user-friendly, predictable formats.
# library(dials) --> creates and manages tuning parameters and parameter grids.
library(h2o) # H2O modeling
library(ggthemes) # Better themes for plotting and color palettes
library(glue) # Implementation of interpreted string literals
library(cowplot) # Provides various features to help create publication-quality figures
If you haven’t installed these packages, please install them by calling install.packages([name_of_package]) in the R console. After installing, run the above code block again.
# Visualize the H2O leaderboard to help with model selection
data_transformed_tbl <- automl_models_h2o@leaderboard %>%
as_tibble() %>%
select(-c(aucpr, mean_per_class_error, rmse, mse)) %>%
mutate(model_type = str_extract(model_id, "[^_]+")) %>%
slice(1:n()) %>%
rownames_to_column(var = "rowname") %>%
# Visually this step will not change anything
# It reorders the factors under the hood
mutate(
model_id = as_factor(model_id) %>% reorder(auc),
model_type = as.factor(model_type)
) %>%
pivot_longer(cols = -c(model_id, model_type, rowname),
names_to = "key",
values_to = "value",
names_transform = list(key = forcats::fct_inorder)
) %>%
mutate(model_id = paste0(rowname, ". ", model_id) %>% as_factor() %>% fct_rev())
# Perform visualization
data_transformed_tbl %>%
ggplot(aes(value, model_id, color = model_type)) +
geom_point(size = 3) +
geom_label(aes(label = round(value, 3), hjust = "inward"), show.legend = F) +
scale_color_gdocs() +
# Facet to break out logloss and auc
facet_wrap(~ toupper(key), scales = "free_x") +
labs(title = "Leaderboard Metrics",
subtitle = paste0("Ordered by: ", "AUC (Area Under the Curve)"),
y = "Model Postion, Model ID", x = "") +
theme(legend.position = "bottom")
# Extracts an H2O model name by a position so can more easily use h2o.getModel()
extract_h2o_model_name_by_position <- function(h2o_leaderboard, n = 1, verbose = T) {
model_name <- h2o_leaderboard %>%
as.tibble() %>%
slice(n) %>%
pull(model_id)
if (verbose) message(model_name)
return(model_name)
}
# Save multiple models by extracting from leaderboard
for (num in c(1,2,3,4,5,12,13,14,15,16)){
automl_models_h2o@leaderboard %>%
extract_h2o_model_name_by_position(num) %>%
h2o.getModel() %>%
h2o.saveModel(path = "../h2o_models/03/")
}
# Loading Distributed Random Forest model
drf_h2o <- h2o.loadModel("../h2o_models/03/DRF_1_AutoML_20210103_051847")
# Take a look at the metrics on the training data set
drf_h2o
## Model Details:
## ==============
##
## H2OBinomialModel: drf
## Model ID: DRF_1_AutoML_20210103_051847
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth max_depth mean_depth min_leaves
## 1 1 1 6520 20 20 20.00000 510
## max_leaves mean_leaves
## 1 510 510.00000
##
##
## H2OBinomialMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 0.1070577
## RMSE: 0.3271968
## LogLoss: 2.084588
## Mean Per-Class Error: 0.3201186
## AUC: 0.7070591
## AUCPR: 0.3164136
## Gini: 0.4141182
## R^2: -0.01608532
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 3113 388 0.110825 =388/3501
## Yes 252 224 0.529412 =252/476
## Totals 3365 612 0.160925 =640/3977
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.217391 0.411765 26
## 2 max f2 0.086505 0.475063 36
## 3 max f0point5 0.565217 0.419054 12
## 4 max accuracy 0.565217 0.881318 12
## 5 max precision 0.565217 0.508696 12
## 6 max recall 0.000000 1.000000 53
## 7 max specificity 1.000000 0.974293 0
## 8 max absolute_mcc 0.217391 0.323629 26
## 9 max min_per_class_accuracy 0.047619 0.669237 41
## 10 max mean_per_class_accuracy 0.090909 0.688723 35
## 11 max tns 1.000000 3411.000000 0
## 12 max fns 1.000000 396.000000 0
## 13 max fps 0.000000 3501.000000 53
## 14 max tps 0.000000 476.000000 53
## 15 max tnr 1.000000 0.974293 0
## 16 max fnr 1.000000 0.831933 0
## 17 max fpr 0.000000 1.000000 53
## 18 max tpr 0.000000 1.000000 53
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: drf
## ** Reported on validation data. **
##
## MSE: 0.1116927
## RMSE: 0.3342045
## LogLoss: 2.111451
## Mean Per-Class Error: 0.3248082
## AUC: 0.7180418
## AUCPR: 0.311021
## Gini: 0.4360836
## R^2: -0.03863349
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 2740 360 0.116129 =360/3100
## Yes 231 202 0.533487 =231/433
## Totals 2971 562 0.167280 =591/3533
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.238095 0.406030 24
## 2 max f2 0.043478 0.489218 41
## 3 max f0point5 0.400901 0.411311 14
## 4 max accuracy 0.666667 0.873196 6
## 5 max precision 0.565217 0.461905 11
## 6 max recall 0.000000 1.000000 52
## 7 max specificity 1.000000 0.970000 0
## 8 max absolute_mcc 0.400901 0.317417 14
## 9 max min_per_class_accuracy 0.081197 0.674194 36
## 10 max mean_per_class_accuracy 0.142857 0.696484 30
## 11 max tns 1.000000 3007.000000 0
## 12 max fns 1.000000 362.000000 0
## 13 max fps 0.000000 3100.000000 52
## 14 max tps 0.000000 433.000000 52
## 15 max tnr 1.000000 0.970000 0
## 16 max fnr 1.000000 0.836028 0
## 17 max fpr 0.000000 1.000000 52
## 18 max tpr 0.000000 1.000000 52
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: drf
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 0.09988042
## RMSE: 0.3160386
## LogLoss: 1.539737
## Mean Per-Class Error: 0.2888259
## AUC: 0.7533929
## AUCPR: 0.3507882
## Gini: 0.5067858
## R^2: 0.04595402
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 8335 1144 0.120688 =1144/9479
## Yes 584 694 0.456964 =584/1278
## Totals 8919 1838 0.160640 =1728/10757
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.203247 0.445443 216
## 2 max f2 0.115355 0.532363 280
## 3 max f0point5 0.346205 0.415995 139
## 4 max accuracy 0.706667 0.883890 24
## 5 max precision 0.803571 0.536785 10
## 6 max recall 0.000000 1.000000 399
## 7 max specificity 1.000000 0.983226 0
## 8 max absolute_mcc 0.203247 0.363067 216
## 9 max min_per_class_accuracy 0.058177 0.708092 335
## 10 max mean_per_class_accuracy 0.115355 0.726755 280
## 11 max tns 1.000000 9320.000000 0
## 12 max fns 1.000000 1108.000000 0
## 13 max fps 0.000000 9479.000000 399
## 14 max tps 0.000000 1278.000000 399
## 15 max tnr 1.000000 0.983226 0
## 16 max fnr 1.000000 0.866980 0
## 17 max fpr 0.000000 1.000000 399
## 18 max tpr 0.000000 1.000000 399
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid cv_5_valid
## accuracy 0.8405665 0.020590737 0.8327138 0.8745353 0.8191539 0.8405393 0.8358903
## auc 0.75814676 0.030287553 0.7302061 0.73196036 0.748319 0.7956575 0.7845909
## aucpr 0.3563339 0.033249356 0.32944146 0.33736223 0.3308087 0.3836862 0.400371
## err 0.1594335 0.020590737 0.16728625 0.12546468 0.18084612 0.15946071 0.16410972
## err_count 343.0 44.254944 360.0 270.0 389.0 343.0 353.0
##
## ---
## mean sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid cv_5_valid
## pr_auc 0.3563339 0.033249356 0.32944146 0.33736223 0.3308087 0.3836862 0.400371
## precision 0.38853487 0.049624257 0.3631579 0.46902654 0.33660933 0.39054728 0.38333333
## r2 0.04606942 0.09541021 -0.09422203 0.0040284377 0.06073779 0.11129339 0.14850952
## recall 0.547068 0.08618609 0.5390625 0.4140625 0.53515625 0.6156863 0.6313726
## rmse 0.31571668 0.015874654 0.33864886 0.32308763 0.31381783 0.304739 0.29829004
## specificity 0.8801538 0.032171927 0.87236285 0.93670887 0.8575198 0.8707806 0.86339664
# We want to see how it performs for the testing data frame
# Make sure to convert it to an h20 object
h2o.performance(drf_h2o, newdata = as.h2o(test_tbl))
##
|
| | 0%
|
|=================================================================================================| 100%
## H2OBinomialMetrics: drf
##
## MSE: 0.1095157
## RMSE: 0.3309315
## LogLoss: 2.089339
## Mean Per-Class Error: 0.324444
## AUC: 0.715335
## AUCPR: 0.3011976
## Gini: 0.43067
## R^2: -0.06382155
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 3699 509 0.120960 =509/4208
## Yes 293 262 0.527928 =293/555
## Totals 3992 771 0.168381 =802/4763
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.217391 0.395173 25
## 2 max f2 0.057143 0.477204 39
## 3 max f0point5 0.375000 0.393346 15
## 4 max accuracy 0.900000 0.878228 1
## 5 max precision 0.565217 0.446254 11
## 6 max recall 0.000000 1.000000 52
## 7 max specificity 1.000000 0.969819 0
## 8 max absolute_mcc 0.260870 0.306587 21
## 9 max min_per_class_accuracy 0.060606 0.673479 38
## 10 max mean_per_class_accuracy 0.090909 0.690192 34
## 11 max tns 1.000000 4081.000000 0
## 12 max fns 1.000000 454.000000 0
## 13 max fps 0.000000 4208.000000 52
## 14 max tps 0.000000 555.000000 52
## 15 max tnr 1.000000 0.969819 0
## 16 max fnr 1.000000 0.818018 0
## 17 max fpr 0.000000 1.000000 52
## 18 max tpr 0.000000 1.000000 52
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
drf_grid_01 <- h2o.grid(
# See help page for available algorithms via ?h2o.grid()
algorithm = "randomForest",
# Use the same as the object
grid_id = "drf_grid_01",
# predictor and response variables
x = x,
y = y,
# training and validation frame and crossfold validation
training_frame = train_h2o,
validation_frame = valid_h2o,
nfolds = 5,
# Hyperparamters: Use drf_h2o@allparameters to see all
hyper_params = list(
# Use different number of trees to find a better model
ntrees = c(5, 10, 15, 20, 50, 60, 70, 120, 140, 160, 250)
)
)
# Ordered by increasing logloss
drf_grid_01
## H2O Grid Details
## ================
##
## Grid ID: drf_grid_01
## Used hyper parameters:
## - ntrees
## Number of models: 11
## Number of failed models: 0
##
## Hyper-Parameter Search Summary: ordered by increasing logloss
## ntrees model_ids logloss
## 1 70 drf_grid_01_model_7 0.20399401167510037
## 2 160 drf_grid_01_model_10 0.2045564443342147
## 3 250 drf_grid_01_model_11 0.20532547514236568
## 4 140 drf_grid_01_model_9 0.20586362368499225
## 5 60 drf_grid_01_model_6 0.2063439432279778
## 6 120 drf_grid_01_model_8 0.20967492068659238
## 7 50 drf_grid_01_model_5 0.21273478346606253
## 8 20 drf_grid_01_model_4 0.21473287260872115
## 9 10 drf_grid_01_model_2 0.2309443070196899
## 10 15 drf_grid_01_model_3 0.23845416756891688
## 11 5 drf_grid_01_model_1 0.3424071944849166
# Ordered by decreasing auc
h2o.getGrid(grid_id = "drf_grid_01", sort_by = "auc", decreasing = TRUE)
## H2O Grid Details
## ================
##
## Grid ID: drf_grid_01
## Used hyper parameters:
## - ntrees
## Number of models: 11
## Number of failed models: 0
##
## Hyper-Parameter Search Summary: ordered by decreasing auc
## ntrees model_ids auc
## 1 160 drf_grid_01_model_10 0.9419872790210334
## 2 250 drf_grid_01_model_11 0.9411459084004324
## 3 70 drf_grid_01_model_7 0.9399943636216852
## 4 140 drf_grid_01_model_9 0.9397240188797211
## 5 60 drf_grid_01_model_6 0.9389269352679946
## 6 120 drf_grid_01_model_8 0.935441510522973
## 7 50 drf_grid_01_model_5 0.9344805691058119
## 8 20 drf_grid_01_model_4 0.9284187383328703
## 9 10 drf_grid_01_model_2 0.9161548277132169
## 10 15 drf_grid_01_model_3 0.9101385634433484
## 11 5 drf_grid_01_model_1 0.8786561959465294
drf_grid_01_model_10 <- h2o.getModel("drf_grid_01_model_10")
drf_grid_01_model_10 %>% h2o.auc(train = T, valid = T, xval = T)
## train valid xval
## 0.9388088 0.9478209 0.9419873
# The model is not overfitting because there's a small difference between the
# training AUC and the validation / cross validation AUC
# Run it with test data and compare to the results from "drf_h2o" model above
drf_grid_01_model_10 %>%
h2o.performance(newdata = as.h2o(test_tbl))
##
|
| | 0%
|
|=================================================================================================| 100%
## H2OBinomialMetrics: drf
##
## MSE: 0.05961512
## RMSE: 0.2441621
## LogLoss: 0.2041201
## Mean Per-Class Error: 0.1731258
## AUC: 0.9393579
## AUCPR: 0.706397
## Gini: 0.8787158
## R^2: 0.4209062
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 4002 206 0.048954 =206/4208
## Yes 165 390 0.297297 =165/555
## Totals 4167 596 0.077892 =371/4763
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.317803 0.677672 162
## 2 max f2 0.202572 0.743029 216
## 3 max f0point5 0.393566 0.703851 130
## 4 max accuracy 0.378090 0.927987 136
## 5 max precision 0.894996 1.000000 0
## 6 max recall 0.011884 1.000000 378
## 7 max specificity 0.894996 1.000000 0
## 8 max absolute_mcc 0.317803 0.633957 162
## 9 max min_per_class_accuracy 0.176536 0.864068 232
## 10 max mean_per_class_accuracy 0.202572 0.867033 216
## 11 max tns 0.894996 4208.000000 0
## 12 max fns 0.894996 554.000000 0
## 13 max fps 0.000997 4208.000000 399
## 14 max tps 0.011884 555.000000 378
## 15 max tnr 0.894996 1.000000 0
## 16 max fnr 0.894996 0.998198 0
## 17 max fpr 0.000997 1.000000 399
## 18 max tpr 0.011884 1.000000 378
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
# Loading top H2O model
stacked_ensemble_h2o <- h2o.loadModel("../h2o_models/03/StackedEnsemble_AllModels_AutoML_20210103_051847")
performance_h2o <- h2o.performance(stacked_ensemble_h2o, newdata = as.h2o(test_tbl))
typeof(performance_h2o)
## [1] "S4"
performance_h2o %>% slotNames()
## [1] "algorithm" "on_train" "on_valid" "on_xval" "metrics"
performance_tbl <- performance_h2o %>%
h2o.metric() %>%
as.tibble()
performance_tbl %>%
glimpse()
## Rows: 400
## Columns: 20
## $ threshold [3m[38;5;246m<dbl>[39m[23m 0.9896328, 0.9849924, 0.9811434, 0.9787006, 0.9764420, 0.9743949, 0.9722…
## $ f1 [3m[38;5;246m<dbl>[39m[23m 0.003597122, 0.007181329, 0.021390374, 0.024911032, 0.042253521, 0.04561…
## $ f2 [3m[38;5;246m<dbl>[39m[23m 0.002251238, 0.004500450, 0.013477089, 0.015716210, 0.026869682, 0.02908…
## $ f0point5 [3m[38;5;246m<dbl>[39m[23m 0.008944544, 0.017761989, 0.051813472, 0.060034305, 0.098846787, 0.10569…
## $ accuracy [3m[38;5;246m<dbl>[39m[23m 0.8836868, 0.8838967, 0.8847365, 0.8849465, 0.8857863, 0.8857863, 0.8864…
## $ precision [3m[38;5;246m<dbl>[39m[23m 1.0000000, 1.0000000, 1.0000000, 1.0000000, 0.9230769, 0.8666667, 0.8888…
## $ recall [3m[38;5;246m<dbl>[39m[23m 0.001801802, 0.003603604, 0.010810811, 0.012612613, 0.021621622, 0.02342…
## $ specificity [3m[38;5;246m<dbl>[39m[23m 1.0000000, 1.0000000, 1.0000000, 1.0000000, 0.9997624, 0.9995247, 0.9995…
## $ absolute_mcc [3m[38;5;246m<dbl>[39m[23m 0.03990219, 0.05643614, 0.09779135, 0.10563783, 0.13150867, 0.13141073, …
## $ min_per_class_accuracy [3m[38;5;246m<dbl>[39m[23m 0.001801802, 0.003603604, 0.010810811, 0.012612613, 0.021621622, 0.02342…
## $ mean_per_class_accuracy [3m[38;5;246m<dbl>[39m[23m 0.5009009, 0.5018018, 0.5054054, 0.5063063, 0.5106920, 0.5114741, 0.5141…
## $ tns [3m[38;5;246m<dbl>[39m[23m 4208, 4208, 4208, 4208, 4207, 4206, 4206, 4206, 4206, 4206, 4206, 4205, …
## $ fns [3m[38;5;246m<dbl>[39m[23m 554, 553, 549, 548, 543, 542, 539, 535, 533, 530, 527, 524, 523, 522, 52…
## $ fps [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 7, 8, 8, 8, 11, 13, 1…
## $ tps [3m[38;5;246m<dbl>[39m[23m 1, 2, 6, 7, 12, 13, 16, 20, 22, 25, 28, 31, 32, 33, 34, 37, 39, 41, 42, …
## $ tnr [3m[38;5;246m<dbl>[39m[23m 1.0000000, 1.0000000, 1.0000000, 1.0000000, 0.9997624, 0.9995247, 0.9995…
## $ fnr [3m[38;5;246m<dbl>[39m[23m 0.9981982, 0.9963964, 0.9891892, 0.9873874, 0.9783784, 0.9765766, 0.9711…
## $ fpr [3m[38;5;246m<dbl>[39m[23m 0.0000000000, 0.0000000000, 0.0000000000, 0.0000000000, 0.0002376426, 0.…
## $ tpr [3m[38;5;246m<dbl>[39m[23m 0.001801802, 0.003603604, 0.010810811, 0.012612613, 0.021621622, 0.02342…
## $ idx [3m[38;5;246m<int>[39m[23m 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20…
theme_new <- theme(
legend.position = "bottom",
legend.title = element_text(size = 11),
legend.text = element_text(size = 9),
legend.key = element_blank(),
panel.background = element_rect(fill = "transparent"),
panel.border = element_rect(color = "black", fill = NA, size = 0.5),
panel.grid.major = element_line(color = "grey", size = 0.333)
)
performance_tbl %>%
filter(f1 == max(f1))
performance_tbl %>%
ggplot(aes(x = threshold)) +
geom_line(aes(y = precision, color = "Precision"), size = 0.5) +
geom_line(aes(y = recall, color = "Recall"), size = 0.5) +
scale_color_manual(breaks = c("Precision", "Recall"),
values = c("blue", "red")) +
# Insert line where precision and recall are harmonically optimized
geom_vline(xintercept = h2o.find_threshold_by_max_metric(performance_h2o, "f1")) +
labs(
title = "Precision vs. Recall",
y = "Value",
x = "Threshold") +
theme_new
load_model_performance_metrics <- function(path, test_tbl) {
model_h2o <- h2o.loadModel(path)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.metric() %>%
as_tibble() %>%
mutate(auc = h2o.auc(perf_h2o)) %>%
select(tpr, fpr, auc)
}
model_metrics_tbl <- fs::dir_info(path = "../h2o_models/03/") %>%
select(path) %>%
mutate(metrics = map(path, load_model_performance_metrics, test_tbl)) %>%
unnest(cols = metrics)
model_metrics_tbl %>%
arrange(desc(auc)) %>%
mutate(
# Extract the model names
PATH = str_split(path, pattern = "/", simplify = T)[,4] %>% as_factor(),
AUC = auc %>% round(3) %>% as.character() %>% as_factor()
) %>%
ggplot(aes(fpr, tpr, color = PATH, linetype = AUC)) +
geom_line(size = 0.75) +
scale_color_gdocs() +
# just for demonstration purposes
geom_abline(color = "black", linetype = "dotted", size = 0.75) +
theme_minimal() +
theme_new +
theme(legend.direction = "vertical") +
labs(title = "ROC (Receiver Operating Characteristic) Plot",
subtitle = "Performance of Top 5 & Bottom 5 Performing Models",
y = "TPR",
x = "FPR")
load_model_performance_metrics <- function(path, test_tbl) {
model_h2o <- h2o.loadModel(path)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.metric() %>%
as_tibble() %>%
mutate(auc = h2o.auc(perf_h2o)) %>%
select(tpr, fpr, auc, precision, recall)
}
model_metrics_tbl <- fs::dir_info(path = "../h2o_models/03/") %>%
select(path) %>%
mutate(metrics = map(path, load_model_performance_metrics, test_tbl)) %>%
unnest(cols = metrics)
model_metrics_tbl %>%
arrange(desc(auc)) %>%
mutate(
# Extract the model names
PATH = str_split(path, pattern = "/", simplify = T)[,4] %>% as_factor(),
AUC = auc %>% round(3) %>% as.character() %>% as_factor()
) %>%
ggplot(aes(recall, precision, color = PATH, linetype = AUC)) +
geom_line(size = 0.75) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
theme(legend.direction = "vertical") +
labs(title = "Precision vs Recall Plot",
subtitle = "Performance of Top 5 & Bottom 5 Performing Models",
y = "Precision",
x = "Recall")
# Table for Gain and Lift plotting
gain_lift_tbl <- performance_h2o %>%
h2o.gainsLift() %>%
as.tibble()
## Gain Plot
gain_transformed_tbl <- gain_lift_tbl %>%
select(group, cumulative_data_fraction, cumulative_capture_rate, cumulative_lift) %>%
select(-contains("lift")) %>%
mutate(baseline = cumulative_data_fraction) %>%
rename(gain = cumulative_capture_rate) %>%
# prepare the data for the plotting (for the color and group aesthetics)
pivot_longer(cols = c(gain, baseline), values_to = "value", names_to = "key")
gain_transformed_tbl %>%
ggplot(aes(x = cumulative_data_fraction, y = value, color = key)) +
geom_line(size = 0.5) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "Gain Chart",
x = "Cumulative Data Fraction",
y = "Gain")
## Lift Plot
lift_transformed_tbl <- gain_lift_tbl %>%
select(group, cumulative_data_fraction, cumulative_capture_rate, cumulative_lift) %>%
select(-contains("capture")) %>%
mutate(baseline = 1) %>%
rename(lift = cumulative_lift) %>%
pivot_longer(cols = c(lift, baseline), values_to = "value", names_to = "key")
lift_transformed_tbl %>%
ggplot(aes(x = cumulative_data_fraction, y = value, color = key)) +
geom_line(size = 0.5) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "Lift Chart",
x = "Cumulative Data Fraction",
y = "Lift")
plot_h2o_performance <- function(h2o_leaderboard, newdata, order_by = c("auc", "logloss"),
top_models = 2, bottom_models = 2, size = 1.5) {
# Inputs
leaderboard_tbl <- h2o_leaderboard %>%
as_tibble() %>%
slice(1:top_models,(n()-bottom_models+1):n())
newdata_tbl <- newdata %>%
as_tibble()
# Selecting the first, if nothing is provided
order_by <- tolower(order_by[[1]])
# Convert string stored in a variable to column name (symbol)
order_by_expr <- rlang::sym(order_by)
# Turn of the progress bars ( opposite h2o.show_progress())
h2o.no_progress()
# 1. Model Metrics
get_model_performance_metrics <- function(model_id, test_tbl) {
model_h2o <- h2o.getModel(model_id)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.metric() %>%
as.tibble() %>%
select(threshold, tpr, fpr, precision, recall)
}
model_metrics_tbl <- leaderboard_tbl %>%
mutate(metrics = map(model_id, get_model_performance_metrics, newdata_tbl)) %>%
unnest(cols = metrics) %>%
mutate(model_id = as_factor(model_id) %>%
# programmatically reorder factors depending on order_by
fct_reorder(!! order_by_expr,
.desc = ifelse(order_by == "auc", TRUE, FALSE)),
auc = auc %>%
round(3) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id)),
logloss = logloss %>%
round(4) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id)))
## 1A. ROC Plot
p1 <- model_metrics_tbl %>%
ggplot(aes(fpr, tpr, color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "ROC", x = "FPR", y = "TPR") +
theme(legend.direction = "vertical")
## 1B. Precision vs Recall
p2 <- model_metrics_tbl %>%
ggplot(aes(recall, precision, color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "Precision Vs Recall", x = "Recall", y = "Precision") +
theme(legend.position = "none")
## 2. Gain / Lift
get_gain_lift <- function(model_id, test_tbl) {
model_h2o <- h2o.getModel(model_id)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.gainsLift() %>%
as.tibble() %>%
select(group, cumulative_data_fraction, cumulative_capture_rate, cumulative_lift)
}
gain_lift_tbl <- leaderboard_tbl %>%
mutate(metrics = map(model_id, get_gain_lift, newdata_tbl)) %>%
unnest(cols = metrics) %>%
mutate(model_id = as_factor(model_id) %>%
fct_reorder(!! order_by_expr,
.desc = ifelse(order_by == "auc", TRUE, FALSE)),
auc = auc %>%
round(3) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id)),
logloss = logloss %>%
round(4) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id))) %>%
rename(gain = cumulative_capture_rate,
lift = cumulative_lift)
## 2A. Gain Plot
p3 <- gain_lift_tbl %>%
ggplot(aes(cumulative_data_fraction, gain,
color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size,) +
geom_segment(x = 0, y = 0, xend = 1, yend = 1,
color = "red", size = size, linetype = "dotted") +
scale_color_gdocs() +
theme_minimal() +
theme_new +
expand_limits(x = c(0, 1), y = c(0, 1)) +
labs(title = "Gain", x = "Cumulative Data Fraction", y = "Gain") +
theme(legend.position = "none")
## 2B. Lift Plot
p4 <- gain_lift_tbl %>%
ggplot(aes(cumulative_data_fraction, lift,
color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size) +
geom_segment(x = 0, y = 1, xend = 1, yend = 1,
color = "red", size = size, linetype = "dotted") +
scale_color_gdocs() +
theme_minimal() +
theme_new +
expand_limits(x = c(0, 1), y = c(0, 1)) +
labs(title = "Lift", x = "Cumulative Data Fraction", y = "Lift") +
theme(legend.position = "none")
### Combine using cowplot
# cowplot::get_legend extracts a legend from a ggplot object
p_legend <- get_legend(p1)
# Remove legend from p1
p1 <- p1 + theme(legend.position = "none")
# cowplot::plt_grid() combines multiple ggplots into a single cowplot object
p <- cowplot::plot_grid(p1, p2, p3, p4, ncol = 2)
# cowplot::ggdraw() sets up a drawing layer
p_title <- ggdraw() +
# cowplot::draw_label() draws text on a ggdraw layer / ggplot object
draw_label(glue("Metrics for Top {top_models} & Bottom {bottom_models} H2O Models"),
size = 18, fontface = "bold", color = "#2C3E50")
p_subtitle <- ggdraw() +
draw_label(glue("Ordered by {toupper(order_by)}"),
size = 10, color = "#2C3E50")
# Combine everything
ret <- plot_grid(p_title, p_subtitle, p, p_legend,
# Adjust the relative spacing, so that the legends always fits
ncol = 1, rel_heights = c(0.05, 0.05, 1, 0.05 * (top_models + bottom_models)))
h2o.show_progress()
return(ret)
}
automl_models_h2o@leaderboard %>%
plot_h2o_performance(newdata = test_tbl, order_by = "logloss",
size = 0.75, bottom_models = 5, top_models = 5)
R!